This library has been created to enable users to view examples of data visualisations utilising the NHS themes, using NHS public data sets. Users can load the required packages and create the sample data sets, and then choose which data visualisations they would like to run in their own environment.
The foundations of this library are sourced from a GGplot guide by Mike Perham. This has been expanded as a proof of concept for working collaboratively over regions to input and build upon, becoming the R Data Viz Library.
For more information about each data visualisation type, the below are recommended to review:
if (!require("pacman")) install.packages("pacman"); library(pacman)
pacman::p_load(Rcpp, tidyverse,dplyr,tidyr,
ggplot2,ggthemes,ggtext,scales,
png,ggalt,NHSRdatasets,onsr,shinycssloaders, plotly, FunnelPlotR)
# install.packages('devtools')
#devtools::install_github('nhs-r-community/NHSRtheme')
# install.packages("remotes")
remotes::install_github("rOpenSci/fingertipsR",
build_vignettes = TRUE,
dependencies = "suggests",
build = F)
All of the examples in this document use A&E dummy data from the NHSRdatasets package for NHS reporting, fingertips data for Public Health and ONS data for population data. These give us broad datasets that can be used for different data visualisation types.
More information on these packages can be found here:
https://github.com/nhs-r-community/NHSRdatasets
https://nhs-r-community.github.io/NHSRdatasets/
#Load initial dataset and clean up
Attends <- NHSRdatasets::ae_attendances %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2") %>%
filter(type ==1) %>%
select(-c(3,5))
https://github.com/ropensci/fingertipsR
The below gets population by sex.
# Load fingertipsR package
library(fingertipsR)
# Get available profiles in fingertips
profiles_data <- profiles()
print(profiles_data)
# Find profiles related to population
population_profiles <- profiles_data %>%
filter(grepl("population", ProfileName, ignore.case = TRUE))
print(population_profiles)
# Search for indicators related to population structure, DomainID was found by viewing the population_profiles and selecting a DomainID
population_indicators <- indicator_metadata(DomainID = "1938133081") #Replace with DomainID from above
# Get the relevant indicator of the measure from your list of population_indicators
indicator_id <- 92708 # Replace with the actual indicator ID from the results above
#Get list of area types
area_types_data <- area_types()
# Get the data for the specific indicator
population_data <- fingertips_data(IndicatorID = indicator_id,AreaTypeID = "15") #Note for Area Types you can select all, but it will take a long time. 15 is England.
# Filter data to include only relevant columns and non-NA values
population_data_filtered <- population_data %>%
filter(!is.na(Age), !is.na(Value)) %>%
select(AreaName, Sex, Age, Value)
# Adjust the values for plotting (male values negative for pyramid structure)
population_data_filtered <- population_data_filtered %>%
mutate(Value = ifelse(Sex == "Male", -Value, Value)) %>%
filter(Age != "All ages") %>%
filter(Sex != "Persons")
# Convert the Age column to a factor and specify the levels in the desired order
population_data_filtered$Age <- factor(population_data_filtered$Age, levels = c("0-4 yrs", "5-9 yrs", "10-14 yrs", "15-19 yrs", "20-24 yrs", "25-29 yrs", "30-34 yrs", "35-39 yrs", "40-44 yrs", "45-49 yrs", "50-54 yrs", "55-59 yrs", "60-64 yrs", "65-69 yrs", "70-74 yrs", "75-79 yrs", "80-84 yrs", "85-89 yrs","90+ yrs"))
https://medium.com/@VickyCrockett1/how-do-you-get-data-into-r-from-the-ons-c860043fef8c
The next step loads the data and performs some simple filtering steps.
All of the examples in this document use dummy data from the NHSRdatasets package (more information on this package can be found here: https://github.com/nhs-r-community/NHSRtheme). As the package is not in CRAN, you need to use devtools to load the package from github.
## Using GitHub PAT from the git credential store.
## Skipping install of 'NHSRtheme' from a github remote, the SHA1 (48293555) has not changed since last install.
## Use `force = TRUE` to force installation
## DarkBlue Blue BrightBlue LightBlue AquaBlue Black DarkGrey
## "#003087" "#005EB8" "#0072CE" "#41B6E6" "#00A9CE" "#231f20" "#425563"
## MidGrey PaleGrey DarkGreen Green LightGreen AquaGreen Purple
## "#768692" "#E8EDEE" "#006747" "#009639" "#78BE20" "#00A499" "#330072"
## DarkPink Pink DarkRed Red Orange WarmYellow Yellow
## "#7C2855" "#AE2573" "#8A1538" "#DA291C" "#ED8B00" "#FFB81C" "#FAE100"
#Filter initial dataset
line_df <- Attends %>%
filter(org_code=="RXQ")
#Make plot
ggplot(line_df, aes(x = period, y = attendances)) +
geom_line(colour = "#005EB8", size = 1.5) +
scale_y_continuous(labels = comma) +
labs(title="Type 1 attendances - Bucks Healthcare",
subtitle = "April 2016 to March 2019",
y = "Attendances",
x = "Month") +
expand_limits(y = 0)
NEY TO ADD IN
#Filter initial dataset
multiple_line_df <- Attends %>%
filter(org_code == "RXQ" | org_code=="RTH")
#Make plot
ggplot(multiple_line_df,
aes(x = period, y = attendances, colour = org_code)) +
geom_line(size = 1) +
geom_point() +
scale_colour_manual(values = c("#005EB8", "#41B6E6")) +
scale_y_continuous(labels = comma) +
labs(
title = "Type 1 attendances - Bucks Healthcare vs Royal Berkshire",
subtitle = "April 2016 to March 2019",
y = "Attendances",
x = "Month"
) +
expand_limits(y = 0) +
theme(legend.title = element_blank())
NEY TO ADD IN
NEY TO ADD IN
#Filter initial dataset
bar_df <- Attends %>%
filter(period == "2019-03-01")
#Make plot
bar <- ggplot(bar_df, aes(x = org_code, y = attendances)) +
geom_bar(stat = "identity",
position = "identity",
fill = "#005EB8") +
geom_hline(yintercept = 0,
size = 1,
colour = "#333333") +
scale_y_continuous(labels = comma) +
labs(
title = "Type 1 attendances",
subtitle = "March 2019",
y = "Attendances",
x = "Provider Code"
)
plot(bar)
Add labels
The code below adds labels to your simple bar chart.
#Filter initial dataset
bar + geom_text(aes(label = scales::comma(attendances)), vjust =2, color= "White")
NEY TO ADD IN
#Filter initial dataset
grouped_bar_df <- Attends %>%
filter(period == "2017-03-01" | period == "2019-03-01") %>%
select(c(1:3))
#Make plot
ggplot(grouped_bar_df,
aes(
x = org_code,
y = attendances,
fill = as.factor(period)
)) +
geom_bar(stat = "identity", position = "dodge") +
geom_hline(yintercept = 0,
size = 1,
colour = "#333333") +
scale_y_continuous(labels = comma) +
#NHSRtheme::scale_fill_nhs('blues')+
labs(
title = "Attendances have increased in all providers other than Bucks Healthcare",
subtitle = "March 2017 vs March 2019",
y = "Attendances",
x = "Provider Code"
) +
theme(legend.title = element_blank())
NEY TO ADD IN
AttendsAll <- NHSRdatasets::ae_attendances %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01')
ggplot(AttendsAll, aes(fill = type, y = attendances, x = org_code)) +
geom_bar(position = "stack", stat = "identity") +
scale_y_continuous(labels = comma) +
labs(title = "A&E attendances by department type - March 2017",
y = "Attendances",
x = "Provider Code") +
theme(legend.title = element_blank())
NEY TO ADD IN
AttendsAll <- NHSRdatasets::ae_attendances %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01')
ggplot(AttendsAll, aes(fill = type, y = attendances, x = org_code)) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = percent) +
labs(title = "A&E attendances by department type - March 2017",
y = "Attendances",
x = "Provider Code") +
theme(legend.title = element_blank())
NEY TO ADD IN
#For this example we are filtering on 5 organisations, type 1 activity & excluding column 3 from the dataframe.
Attends <- NHSRdatasets::ae_attendances %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2")
# Summarise data for type 1 attendances
type_1_summary <- Attends %>%
filter(type == 1) %>%
group_by(org_code, period) %>%
summarise(type_1_attendances = sum(attendances, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'org_code'. You can override using the
## `.groups` argument.
# Summarise data for non-type 1 attendances
type_other_summary <- Attends %>%
filter(type == 3|type== 2) %>%
group_by(org_code, period) %>%
summarise(type_other_attendances = sum(attendances, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'org_code'. You can override using the
## `.groups` argument.
# Summarise total attendances and total admissions
total_summary <- Attends %>%
group_by(org_code, period) %>%
summarise(
total_attendances = sum(attendances, na.rm = TRUE),
total_breaches = sum(breaches, na.rm = TRUE),
total_admissions = sum(admissions, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'org_code'. You can override using the
## `.groups` argument.
# Merge the summaries into a single data frame
final_summary <- total_summary %>%
left_join(type_1_summary, by = c("org_code", "period")) %>%
left_join(type_other_summary, by = c("org_code", "period"))
# Replace NA values with 0 for type_1_attendances and type_3_attendances
final_summary <- final_summary %>%
mutate(
type_1_attendances = replace_na(type_1_attendances, 0),
type_other_attendances = replace_na(type_other_attendances, 0)
)
# Add percentage columns
final_summary <- final_summary %>%
mutate(
perc_admissions_attendances = (total_admissions / total_attendances) * 100,
perc_type1_attendances_total = (type_1_attendances / total_attendances) * 100,
perc_breaches_attendances = (total_breaches / total_attendances) * 100
)
# Filter initial dataset
bubble_df <- final_summary
# Calculate size for bubble chart (proportional to type 1 attendances)
bubble_df <- bubble_df %>%
mutate(size = perc_type1_attendances_total / max(perc_type1_attendances_total) * 100)
# Create bubble chart
ggplot(bubble_df, aes(x = perc_admissions_attendances, y = perc_breaches_attendances, size = size, color = size)) +
geom_point(alpha = 0.5) +
scale_size_continuous(name = "Proportion of type 1") +
#NHSRtheme::scale_fill_nhs('blues', name = "Proportion of type 1") +
labs(title = "Bubble Chart of % 4 Hour Breaches vs % converted to admission with % Attendances Type 1 Size",
x = "Conversion Rate", y = "% 4 Hour Breaches") +
theme(legend.position = "right")
NEY TO ADD IN
# Plot the population pyramid
ggplot(population_data_filtered, aes(x = Age, y = Value, fill = Sex)) +
geom_bar(stat = "identity", position = "identity") +
coord_flip() +
scale_y_continuous(labels = function(x) comma(abs(x))) +
labs(title = "Population Age Profile by Gender",
x = "Age Group",
y = "Population Count",
fill = "Gender") +
NHSRtheme::scale_fill_nhs("blues")
NEY TO ADD IN
Insert here
Insert here
# Create a radial column chart
ggplot(Attends, aes(x = reorder(org_code, -attendances), y = attendances)) +
geom_col(width = 0.5, fill = "skyblue") +
coord_polar(start = 0) +
#NHSRtheme::scale_fill_nhs("blues") +
scale_y_continuous(labels = function(x) comma(abs(x))) +
labs(title = "Attendances per Trust",
x = NULL, y = NULL)
NEY TO ADD IN
# Summarize the data
summary_data <- Attends %>%
group_by(org_code) %>%
summarise(min_attendance = min(attendances),
max_attendance = max(attendances))
# Create the horizontal bar range chart
ggplot(summary_data, aes(y = org_code)) +
geom_linerange(aes(xmin = min_attendance, xmax = max_attendance), color = "blue", size = 1.5) +
labs(title = "Range of Type 1 Attendances by Trust between 2019 and 2023",
x = "Number of Attendances",
y = "Organisation Code") +
NHSRtheme::scale_fill_nhs("blues")
NEY TO ADD IN
# Plot the stacked area chart
ggplot(Attends, aes(x = period, fill = org_code)) +
geom_area(stat = "count") +
NHSRtheme::scale_fill_nhs("blues")
NEY TO ADD IN
Use facet_wrap() to create multiple charts split by
subgroup in data. You can use ncol = or nrow
to specify number of rows or columns. For example,
facet_wrap(~org_code, nrow=1) to put all charts in a single
row.
ggplot(Attends, aes(x = period, y = attendances)) +
geom_line(colour = "#005EB8", size = 1) +
facet_wrap(~org_code)+
labs(title="Type 1 attendances",
subtitle = "April 2016 to March 2019") +
expand_limits(y = 0)
g <- ggplot(Attends, aes(org_code, attendances))
g + geom_boxplot(varwidth=T, fill="light blue") +
labs(title="A&E Attendances",
subtitle="Distribution by Trust",
caption="Source: A&E Monthly Stats",
x="Trust",
y="A&E attendances")
Insert here
Insert here
Insert here
MyAttends <- NHSRdatasets::ae_attendances %>%
filter(period == '2017-03-01') %>%
filter(type ==1) # %>%
# select(-c(3,5))
MyAttends$org_code <- as.character(MyAttends$org_code)
funnel_plot(.data = MyAttends,
numerator= breaches, #Specify the numerator
denominator=attendances, #Specify the denominator
group = org_code, #Specify that we want to plot Trust Names
title = "A&E breaches", #Specify the chart title
draw_adjusted = TRUE, #Specify that we want to adjust the control limits to account for over-dispersion
sr_method = "SHMI", #Specify to adjust for over-dispersion using the CQC Methodology (can also use SHMI)
# label = "highlight", #Specify that we want to use the 'highlight' argument to show outliers
# highlight=HighLight, #Get the highlight argument to reference the list of outlier NEY trusts
data_type="PR", #Specify the indicator is a proportion
limit=95, #Specify to show both 95 and 99.8% control limits
multiplier = 100,
y_label = "% breaches", #Specify the X Axis Label
x_label = "No. of attendances") #Specify the Y Axis Label
## A funnel plot object with 139 points of which 13 are outliers.
## Plot is adjusted for overdispersion.
NEY TO ADD IN
Insert here
Insert here
Insert here
#Violin Plot
ggplot(Attends, aes(org_code, attendances)) + geom_violin() +
labs(title="A&E Attendances", subtitle="Range by Trust", caption="Source: A&E Monthly Statistics", x="Trust", y="Attendances") + scale_fill_brewer(palette="Blues") + theme_classic()
The ggtext package can be used to add colour to titles
or subtitles. You need to ensure that you use it with
theme(plot.subtitle = element_markdown(hjust = 0, size = 12))
otherwise it will not work.
#Prepare data
dumbbell_df <- NHSRdatasets::ae_attendances %>%
filter(type ==1) %>%
select(-c(3,6)) %>%
filter(period == "2017-03-01" | period =="2019-03-01") %>%
mutate(period =as.numeric(format(period,'%Y'))) %>%
mutate(period = as.character(period)) %>%
mutate(performance = 1- (breaches/attendances)) %>%
select(c(1:2,5)) %>%
spread(period, performance) %>%
mutate(gap = `2019` - `2017`) %>%
arrange(desc(gap)) %>%
head(10)
#Make plot
dumbell <- ggplot(dumbbell_df, aes(x = `2017`, xend = `2019`, y = reorder(org_code, gap), group = org_code)) +
geom_dumbbell(colour = "#dddddd",
size = 3,
colour_x = "#41B6E6",
colour_xend = "#005EB8") +
scale_x_continuous(labels = scales::percent_format(accuracy=1))+
geom_vline(xintercept = 0.95, size = 1, colour="#333333", linetype = "dashed") +
labs(title = "Performance improved for all providers",
subtitle = "<span style='color: #41B6E6;'>March 2017 <span><span style='color: black;'> vs <span><span style='color: #005EB8;'>March 2019<span>") +
xlab("4 hour performance") +
ylab("Org code") +
theme(plot.subtitle = element_markdown(hjust = 0, size = 12))+ theme(legend.position = "none")
plot(dumbell)
Adding annotations
You can use geom_label to add annotations to existing
plots or you can add line in when creating ggplot.
dumbell + geom_label(aes(x = 0.9, y = "R1K",label = "Standard"),
hjust = -0.5,
vjust = -0.1,
colour = "#555555",
label.size = NA,
family="Arial",
size = 4)
dumbbellplotly_df <- dumbbell_df
dumbbellplotly_df$org_code <- as.character(dumbbellplotly_df$org_code)
dumbbellplotly_df$`2017`<- round(dumbbellplotly_df$`2017`*100,1)
dumbbellplotly_df$`2019`<- round(dumbbellplotly_df$`2019`*100,1)
dumbbellplotly_df$`gap`<- round(dumbbellplotly_df$`gap`*100,1)
## Region chart This sets up a blank plotly object to begin with
dumbellplotly<- plot_ly(dumbbellplotly_df, color = I("gray80"))
## It then adds a line for each 'org_code that starts at 2017 and goes through to the 2019 number
dumbellplotly <-dumbellplotly %>% add_segments(x = ~dumbbellplotly_df$`2017`, xend = ~dumbbellplotly_df$`2019`, y = ~org_code, yend = ~org_code, showlegend = FALSE)
# It then adds a dot on the 2017 end of the line in red
dumbellplotly <- dumbellplotly %>% add_markers(x = ~dumbbellplotly_df$`2017`, y = ~org_code, name = "2017", color = I("#AE2573"),size = 8 )
# It then adds a dot on the 2019 end of the line in blue
dumbellplotly <- dumbellplotly %>% add_markers(x = ~dumbbellplotly_df$`2019`,y = ~org_code, name = "2019", color = I("#0072CE"),size = 8 )
#The titles are then added
dumbellplotly<- dumbellplotly %>% layout( title = "Performance improved for all providers",
xaxis = list(title = "4 hour performance", ticksuffix="%"), margin = list(l = 65),
shapes = list(list(type = "line",
x0 = 95,
x1 = 95,
y0 = 0,
y1 = "RXC",
ref = "x",
yref = "y",
line = list(color="#333333",dash = "dash"))),
annotations = list(
list(
x = 95,
y = "RXC",
xref = "x",
yref = "y",
text = "Standard (95%)",
showarrow = TRUE,
arrowhead = 2,
# ax = 20,
# ay = -30,
font = list(color = "555555#")
)))
dumbellplotly
NEY TO ADD IN